# Calculate the daily probability of evaluation in cycle m

calculate.hazard = function(m, s, all.evalcounts, all.evalremaining, all.cumevals, columns, type="kernel", plot=TRUE)
{
	df = as.data.frame(cbind(1:ncol(all.evalcounts[[m]]),all.evalcounts[[m]][s,]))
	names(df) = col.names=c("day","count")
	expanded = expandRows(df,"count")$day

	if(sum(all.evalcounts[[m]][s,],na.rm=TRUE) == 0)
	{
		warning(paste("No ",columns[m], " evaluations at this school: ",s,sep=''))
		return(NULL)
	}

	# In all cases, need to estimate the expected number of evaluations for each day
	# which will be used to estimate the density function of an evaluation
	
	# try using kernel density as prob:
	if(type == "kernel"){
		this.density = density(expanded)
		dd = approxfun(this.density$x, this.density$y)
		expected.n = dd(df$day)*all.cumevals[[m]][s,ncol(all.cumevals[[m]])]
	}

	# try using a moving average as prob:
	if(type == "ma"){
		ma = function(x,n=5){filter(x,rep(1/n,n),sides=1)}
		expected.n = ma(df$count,5)
	}

	# try using mean
	if(type == "uniform"){
		expected.n = mean(all.evalcounts[[m]][s,])
	}

	days.past.completion = which(all.evalremaining[[m]][s,] == 0)
	prob.unscaled = expected.n/all.evalremaining[[m]][s,]
	prob.unscaled[days.past.completion] = 0

	# note that scaling makes this a pdf, not probability
	prob.unscaled[is.na(prob.unscaled)] = 0
	prob = prob.unscaled/sum(prob.unscaled,na.rm=TRUE)
	prob[is.na(prob)] = 0

	cdf = cumsum(prob)
	# because i am interested in the AREA under prob density by day, it is
	# possible for a teacher to have sum(p) > 1 (i.e. if the last day has p=1
	# then their previous days plus the last should mean sum(p) > 1
	sum.p = cumsum(prob.unscaled)

	# now make cdf actual cdf
	cdf[which(cdf>1)] = 1

	haz = prob/(1-cdf)
	haz[which(haz > 1)] = 1
	haz[days.past.completion] = NA

	cum.haz = cumsum(haz)
	

	# This is the strait empirical hazard function

	emp.cdf = all.cumevals[[m]][s,]/max(all.cumevals[[m]][s,])

	emp.haz = rep(NA, ncol(all.cumevals[[m]]))
	emp.haz[1] = cdf[2]
	for(t in 2:(ncol(all.cumevals[[m]])-1))
	{
		emp.haz[t]=1-(1-emp.cdf[t+1])/(1-cdf[t])
	}

	if(plot == TRUE)
	{	
		dev.new(width=18, height=11)
		par(mfrow=c(5,1),mar=c(2,5,2,1))
		df.bar = barplot(all.evalcounts[[m]][s,], main=columns[m], ylab="Number of Evaluations")
		points(x=df.bar, y=expected.n,type='b',pch=16, cex=.75)
		plot(prob.unscaled,type='l',ylim=c(0,1), ylab = "Daily Evaluation Prob")
		plot(cdf, type='l',ylim=c(0,1), ylab="CDF")
		plot(haz, type='l',ylim=c(0,1), ylab="Hazard")
		plot(cum.haz, type='l', ylab="Cum. Hazard", ylim=c(0,7))
		readline()
		dev.off()
	}
	
	return.list = list(hazard=haz, cumulative.hazard=cum.haz, daily.p=prob.unscaled,
		 pdf=prob, sum.p=sum.p, cdf=cdf, empirical.cdf=emp.cdf, empirical.hazard=emp.haz)
	
	return(return.list)
}
